This Python Jupyter notebook can parse watercare bills and extract the usage data. It can be used to generate a CSV file with the usage data.

You can download PDFs from https://myaccount.watercare.co.nz/bills-and-payments. Set your start date filter as far back as it'll go, and download all the PDFs into a folder called pdfs. Here's a JS snippet you can put in your browser console to save you clicking them all:

var links = document.querySelectorAll('button.flex.hover\\:bg-blue-100')
console.log(`Found ${links.length} links`)
var index = 0
setInterval(function() {
    if (index >= links.length) {
        return
    }
    console.log(index)
    links[index].click()
    index++
}, 250)
In [1]:
import pandas as pd # Tabular data
pd.options.plotting.backend = "plotly"
import plotly.express as px # Plotting
from glob import glob # Finding files
import pdftotext # PDF parsing
from tqdm.auto import tqdm # Progress bars
import re # Regular expressions
from datetime import datetime
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

files = glob("pdfs/*.pdf")
len(files)
Out[1]:
64
In [2]:
def read_PDF(filename):
    with open(filename, "rb") as f:
        pdf = pdftotext.PDF(f)
        text = ""
        for page_text in pdf:
            text += page_text.strip()
        return text

def parse_number(amount):
    if amount.endswith(" cr"):
        return -float(amount[:-3])
    return float(amount)

def parse_text(text):
    if "$500 gift card" in text:
        # This breaks the dollar_amounts regex
        text = text.replace("$ 50", "")
    dates = re.findall(r'(\d{2} [A-Z][a-z]{2} \d{4})', text)
    dollar_amounts = [parse_number(d) for d in re.findall(r'\$ ([\d\.]+(?: cr)?)', text)]
    usage = [parse_number(d) for d in re.findall(r'([\d\.]+) kL', text)]
    unit_rates = [parse_number(d) for d in re.findall(r'\$([\d\.]+)/kL', text)]
    if len(unit_rates) == 4:
        # Rate changed
        unit_rates = unit_rates[::2]

    consumption_period = re.search(r"Consumption period (\d+) days", text).group(1)
    this_reading_match = re.search(r"This reading\s+(\d{2}-\w{3}-\d{2})\s+(\d+)\s*(Estimate|Actual)?", text)
    this_reading_date = this_reading_match.group(1)
    this_reading_date = datetime.strptime(this_reading_date, "%d-%b-%y")
    this_reading_value = int(this_reading_match.group(2))
    this_reading_type = this_reading_match.group(3)

    return {
        "Invoice date": dates[0],
        "Due date": dates[1],
        "Total due ($)": dollar_amounts[0],
        "Opening balance ($)": dollar_amounts[1],
        "Payments received ($)": dollar_amounts[2],
        "Balance still owing ($)": dollar_amounts[3],
        "Water consumption ($)": dollar_amounts[4],
        "Wastewater consumption ($)": dollar_amounts[5],
        "Wastewater fixed ($)": dollar_amounts[6],
        "Balance of current charges ($)": dollar_amounts[7],
        "GST ($)": dollar_amounts[8],
        "Consumption period (days)": int(consumption_period),
        "Reading date": this_reading_date,
        "Reading value (kL)": this_reading_value,
        "Reading type": this_reading_type,
        "Water consumption (kL)": usage[0],
        "Wastewater consumption (kL)": usage[1],
        "Wastewater rate (%)": float(re.search(r"@(\d+.\d+)%", text).group(1)),
        "Water unit rate ($/kL)": unit_rates[0],
        "Wastewater unit rate ($/kL)": unit_rates[1],
    }

results = []
for f in tqdm(files):
    text = read_PDF(f)
    results.append(parse_text(text))
df = pd.DataFrame(results)
for col in ['Invoice date', 'Due date', 'Reading date']:
    df[col] = pd.to_datetime(df[col])
df.sort_values("Invoice date", inplace=True, ascending=False)
df
  0%|          | 0/64 [00:00<?, ?it/s]
Out[2]:
Invoice date Due date Total due ($) Opening balance ($) Payments received ($) Balance still owing ($) Water consumption ($) Wastewater consumption ($) Wastewater fixed ($) Balance of current charges ($) GST ($) Consumption period (days) Reading date Reading value (kL) Reading type Water consumption (kL) Wastewater consumption (kL) Wastewater rate (%) Water unit rate ($/kL) Wastewater unit rate ($/kL)
5 2024-09-06 2024-09-27 484.90 40.24 -40.24 0.00 192.78 263.24 28.88 484.90 63.25 34 2024-09-03 1200 Actual 90.00 70.65 78.5 2.142 3.726
2 2024-07-31 2024-08-21 40.24 -2.96 0.00 -2.96 8.57 11.70 22.93 43.20 5.63 27 2024-07-31 1110 Estimate 4.00 3.14 78.5 2.142 3.726
28 2024-07-05 2024-07-26 0.00 -54.22 0.00 -54.22 9.99 13.63 27.64 51.26 6.69 35 2024-07-04 1106 Actual 5.00 3.92 78.5 1.998 3.476
13 2024-05-30 2024-06-20 0.00 -86.57 0.00 -86.57 5.99 8.20 18.16 32.35 4.22 23 2024-05-30 1101 Estimate 3.00 2.36 78.5 1.998 3.476
54 2024-05-08 2024-05-29 0.00 45.73 -45.73 0.00 -49.95 -68.20 31.58 -86.57 -11.29 40 2024-05-07 1098 Actual 25.00 19.62 78.5 1.998 3.476
57 2024-03-28 2024-04-18 45.73 74.91 -74.91 0.00 11.99 16.37 17.37 45.73 5.96 22 2024-03-28 1123 Estimate 6.00 4.71 78.5 1.998 3.476
61 2024-03-07 2024-03-28 74.91 67.79 -67.79 0.00 19.98 27.29 27.64 74.91 9.77 35 2024-03-06 1117 Estimate 10.00 7.85 78.5 1.998 3.476
3 2024-01-31 2024-02-21 67.79 66.21 -66.21 0.00 17.98 24.54 25.27 67.79 8.84 32 2024-01-31 1107 Estimate 9.00 7.06 78.5 1.998 3.476
40 2024-01-03 2024-01-24 66.21 59.92 -59.92 0.00 17.98 24.54 23.69 66.21 8.64 30 2023-12-30 1098 Estimate 9.00 7.06 78.5 1.998 3.476
0 2023-11-30 2023-12-21 59.92 74.12 -74.12 0.00 15.98 21.83 22.11 59.92 7.82 28 2023-11-30 1089 Estimate 8.00 6.28 78.5 1.998 3.476
48 2023-11-03 2023-11-24 74.12 60.71 -60.71 0.00 19.98 27.29 26.85 74.12 9.67 34 2023-11-02 1081 Estimate 10.00 7.85 78.5 1.998 3.476
60 2023-09-29 2023-10-20 60.71 57.59 -57.59 0.00 15.98 21.83 22.90 60.71 7.92 29 2023-09-29 1071 Estimate 8.00 6.28 78.5 1.998 3.476
9 2023-09-01 2023-09-22 57.59 70.65 -70.65 0.00 13.99 19.12 24.48 57.59 7.51 31 2023-08-31 1063 Actual 7.00 5.50 78.5 1.998 3.476
58 2023-07-31 2023-08-21 70.65 60.53 -60.53 0.00 20.08 27.42 23.15 70.65 9.22 32 2023-07-31 1056 Estimate 11.00 8.64 78.5 1.825 3.174
47 2023-07-03 2023-07-24 60.53 74.22 -74.22 0.00 16.42 22.41 21.70 60.53 7.90 30 2023-06-29 1045 Actual 9.00 7.06 78.5 1.825 3.174
38 2023-05-30 2023-06-20 74.22 85.75 -85.75 0.00 21.90 29.90 22.42 74.22 9.68 31 2023-05-30 1036 Estimate 12.00 9.42 78.5 1.825 3.174
52 2023-05-01 2023-05-22 85.75 61.25 -61.25 0.00 27.38 37.39 20.98 85.75 11.18 29 2023-04-29 1024 Actual 15.00 11.78 78.5 1.825 3.174
62 2023-03-31 2023-04-21 61.25 37.52 -37.52 0.00 16.42 22.41 22.42 61.25 7.99 31 2023-03-31 1009 Estimate 9.00 7.06 78.5 1.825 3.174
11 2023-03-01 2023-03-22 37.52 86.47 -86.47 0.00 7.30 9.97 20.25 37.52 4.89 28 2023-02-28 1000 Actual 4.00 3.14 78.5 1.825 3.174
19 2023-01-31 2023-02-21 86.47 66.31 -66.31 0.00 25.55 34.88 26.04 86.47 11.28 36 2023-01-31 996 Estimate 14.00 10.99 78.5 1.825 3.174
51 2023-01-04 2023-01-25 66.31 63.42 -63.42 0.00 20.08 27.42 18.81 66.31 8.65 26 2022-12-26 982 Actual 11.00 8.64 78.5 1.825 3.174
21 2022-11-30 2022-12-21 63.42 71.37 -71.37 0.00 18.25 24.92 20.25 63.42 8.27 28 2022-11-30 971 Estimate 10.00 7.85 78.5 1.825 3.174
42 2022-11-03 2022-11-24 71.37 68.48 -68.48 0.00 20.08 27.42 23.87 71.37 9.31 33 2022-11-02 961 Actual 11.00 8.64 78.5 1.825 3.174
45 2022-09-30 2022-10-21 68.48 54.83 -54.83 0.00 20.08 27.42 20.98 68.48 8.93 29 2022-09-30 950 Estimate 11.00 8.64 78.5 1.825 3.174
43 2022-09-02 2022-09-23 54.83 77.79 -77.79 0.00 12.78 17.46 24.59 54.83 7.15 34 2022-09-01 939 Actual 7.00 5.50 78.5 1.825 3.174
24 2022-07-29 2022-08-19 77.79 116.49 -116.49 0.00 25.55 34.88 17.36 77.79 10.15 24 2022-07-29 932 Estimate 14.00 10.99 78.5 1.825 3.174
41 2022-07-06 2022-07-27 116.49 70.70 -70.70 0.00 39.24 53.57 23.68 116.49 15.19 35 2022-07-05 918 Actual 23.00 18.06 78.5 1.706 2.966
8 2022-05-31 2022-06-21 70.70 99.67 -99.67 0.00 22.18 30.25 18.27 70.70 9.22 27 2022-05-31 895 Estimate 13.00 10.20 78.5 1.706 2.966
44 2022-05-05 2022-05-26 99.67 58.61 -58.61 0.00 32.41 44.25 23.01 99.67 13.00 34 2022-05-04 882 Actual 19.00 14.92 78.5 1.706 2.966
30 2022-03-31 2022-04-21 58.61 80.16 -80.16 0.00 17.06 23.28 18.27 58.61 7.64 27 2022-03-31 863 Estimate 10.00 7.85 78.5 1.706 2.966
23 2022-03-07 2022-03-28 80.16 42.46 -42.46 0.00 23.88 32.60 23.68 80.16 10.46 35 2022-03-04 853 Actual 14.00 10.99 78.5 1.706 2.966
4 2022-01-28 2022-02-18 42.46 74.80 -74.80 0.00 11.94 16.31 14.21 42.46 5.54 21 2022-01-28 839 Estimate 7.00 5.50 78.5 1.706 2.966
55 2022-01-10 2022-02-01 74.80 63.35 -63.35 0.00 20.47 27.94 26.39 74.80 9.76 39 2022-01-07 832 Actual 12.00 9.42 78.5 1.706 2.966
22 2021-11-29 2021-12-20 63.35 66.73 -66.73 0.00 18.77 25.63 18.95 63.35 8.26 28 2021-11-29 820 Estimate 11.00 8.64 78.5 1.706 2.966
36 2021-11-02 2021-11-23 66.73 72.73 -72.73 0.00 18.77 25.63 22.33 66.73 8.70 33 2021-11-01 809 Actual 11.00 8.64 78.5 1.706 2.966
46 2021-09-29 2021-10-20 72.73 74.08 -74.08 0.00 22.18 30.25 20.30 72.73 9.49 30 2021-09-29 798 Estimate 13.00 10.20 78.5 1.706 2.966
63 2021-08-31 2021-09-21 74.08 67.98 -67.98 0.00 22.18 30.25 21.65 74.08 9.66 32 2021-08-30 785 Estimate 13.00 10.20 78.5 1.706 2.966
10 2021-07-29 2021-08-19 67.98 80.57 -80.57 0.00 20.72 28.27 18.99 67.98 8.87 30 2021-07-29 772 Estimate 13.00 10.20 78.5 1.594 2.772
27 2021-06-30 2021-07-21 80.57 51.64 -51.64 0.00 25.50 34.82 20.25 80.57 10.51 32 2021-06-29 759 Actual 16.00 12.56 78.5 1.594 2.772
16 2021-05-28 2021-06-18 51.64 57.32 -57.32 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-05-28 743 Estimate 9.00 7.06 78.5 1.594 2.772
34 2021-05-03 2021-05-24 57.32 61.73 -61.73 0.00 15.94 21.76 19.62 57.32 7.48 31 2021-04-30 734 Actual 10.00 7.85 78.5 1.594 2.772
1 2021-03-30 2021-04-20 61.73 51.64 -51.64 0.00 17.53 23.95 20.25 61.73 8.05 32 2021-03-30 724 Estimate 11.00 8.64 78.5 1.594 2.772
50 2021-03-01 2021-03-22 51.64 56.05 -56.05 0.00 14.35 19.57 17.72 51.64 6.74 28 2021-02-26 713 Actual 9.00 7.06 78.5 1.594 2.772
31 2021-01-29 2021-02-19 56.05 61.10 -61.10 0.00 15.94 21.76 18.35 56.05 7.31 29 2021-01-29 704 Estimate 10.00 7.85 78.5 1.594 2.772
15 2021-01-05 2021-01-26 61.10 61.10 -61.10 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-12-31 694 Actual 11.00 8.64 78.5 1.594 2.772
18 2020-11-30 2020-12-21 61.10 64.23 -64.23 0.00 17.53 23.95 19.62 61.10 7.97 31 2020-11-30 683 Estimate 11.00 8.64 78.5 1.594 2.772
29 2020-11-02 2020-11-23 64.23 58.58 -58.58 0.00 19.13 26.11 18.99 64.23 8.38 30 2020-10-30 672 Actual 12.00 9.42 78.5 1.594 2.772
56 2020-09-30 2020-10-21 58.58 48.51 -48.51 0.00 15.94 21.76 20.88 58.58 7.64 33 2020-09-30 660 Estimate 10.00 7.85 78.5 1.594 2.772
7 2020-08-31 2020-09-21 48.51 56.69 -56.69 0.00 12.75 17.41 18.35 48.51 6.33 29 2020-08-28 650 Actual 8.00 6.28 78.5 1.594 2.772
49 2020-07-30 2020-08-20 56.69 49.15 -49.15 0.00 15.94 21.76 18.99 56.69 7.39 30 2020-07-30 642 Estimate 10.00 7.85 78.5 1.594 2.772
14 2020-07-01 2020-07-22 49.15 62.01 -62.01 0.00 12.44 16.98 19.73 49.15 6.41 32 2020-06-30 632 Actual 8.00 6.28 78.5 1.555 2.704
12 2020-05-29 2020-06-19 62.01 63.24 -63.24 0.00 18.66 25.47 17.88 62.01 8.09 29 2020-05-29 624 Estimate 12.00 9.42 78.5 1.555 2.704
20 2020-05-01 2020-05-22 63.24 70.60 -70.60 0.00 18.66 25.47 19.11 63.24 8.25 31 2020-04-30 612 Actual 12.00 9.42 78.5 1.555 2.704
6 2020-03-30 2020-04-20 70.60 57.72 -57.72 0.00 21.77 29.72 19.11 70.60 9.21 31 2020-03-30 600 Estimate 14.00 10.99 78.5 1.555 2.704
26 2020-03-02 2020-03-23 57.72 74.28 -74.28 0.00 17.10 23.36 17.26 57.72 7.53 28 2020-02-28 586 Actual 11.00 8.64 78.5 1.555 2.704
53 2020-01-31 2020-02-21 74.28 93.28 -93.28 0.00 23.32 31.85 19.11 74.28 9.69 31 2020-01-31 575 Estimate 15.00 11.78 78.5 1.555 2.704
59 2020-01-03 2020-01-24 93.28 54.66 -54.66 0.00 31.10 42.45 19.73 93.28 12.17 32 2019-12-31 560 Actual 20.00 15.70 78.5 1.555 2.704
32 2019-12-05 2019-12-30 54.66 0.00 0.00 0.00 15.55 21.23 17.88 54.66 7.13 29 2019-11-29 540 Estimate 10.00 7.85 78.5 1.555 2.704
35 2019-11-05 2019-11-26 52.23 66.94 -66.94 0.00 14.00 19.12 19.11 52.23 6.81 31 2019-10-31 530 Actual 9.00 7.07 78.5 1.555 2.704
39 2019-10-02 2019-10-23 66.94 58.96 -58.96 0.00 20.22 27.61 19.11 66.94 8.73 31 2019-09-30 521 Estimate 13.00 10.21 78.5 1.555 2.704
33 2019-09-04 2019-09-25 58.96 73.67 -73.67 0.00 17.11 23.36 18.49 58.96 7.69 30 2019-08-30 508 Actual 11.00 8.64 78.5 1.555 2.704
25 2019-08-02 2019-08-23 73.67 79.33 -79.33 0.00 23.33 31.85 18.49 73.67 9.61 30 2019-07-31 497 Estimate 15.00 11.78 78.5 1.555 2.704
37 2019-07-04 2019-07-25 79.33 67.93 -67.93 0.00 25.81 34.98 18.54 79.33 10.35 31 2019-07-01 482 Actual 16.45 0.55 78.5 1.517 2.618
17 2019-06-05 2019-06-26 67.93 76.86 -76.86 0.00 21.24 28.77 17.92 67.93 8.86 30 2019-05-31 465 Estimate 14.00 10.99 78.5 1.517 2.618
In [3]:
px.scatter(df, x="Reading date", y="Reading value (kL)", color="Reading type", title="Reading values (kL)", trendline="ols")
In [4]:
px.bar(df, x="Reading date", y="Water consumption (kL)")
In [5]:
df["Average daily usage (L)"] = df["Water consumption (kL)"] * 1000 / df["Consumption period (days)"]
px.bar(df, x="Reading date", y="Average daily usage (L)")
In [6]:
px.bar(df, x="Reading date", y="Balance of current charges ($)")
In [7]:
px.line(df, x="Reading date", y=["Water unit rate ($/kL)", "Wastewater unit rate ($/kL)"], title="Unit rates ($/kL)")